import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
import requests
from PIL import Image
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
db_loc = 'data/bgg_data.sqlite'
def query_db(statement):
'''
Run the given query against the local Sqlite database file, and return the results in a DataFrame.
'''
conn = sqlite3.connect(db_loc)
try:
df = pd.read_sql(statement, conn)
finally:
conn.close()
return df
def plot_category_bar(df, col_name, n, title_col, figsize=(12, 8)):
'''
For the given DataFrame, plot a bar chart counting all instances for the given column,
then plot a similar bar chart for the column for each name in the `person` column.
'''
df_count = df.groupby(col_name).size().reset_index(name='count').nlargest(n, 'count')
labels = df[col_name].unique()
# assign a color to each label value for consistency
palette = dict(zip(labels, sns.color_palette(n_colors=len(labels))))
# plot the chart for the total distribution
f = plt.figure(1, figsize=figsize)
f.suptitle(f'Total {title_col} distribution')
sns.barplot(data=df_count, x=col_name, y='count', palette=palette)
plt.xticks(rotation=90)
users = df['person'].unique()
df_count = df.groupby(['person', col_name]).size().reset_index(name='count')
# we can't guarantee that the labels match for each user
# therefore, plot a new chart for each user
fig_count = 2
for user in users:
f = plt.figure(fig_count, figsize=figsize)
user_count = df_count[df_count['person'] == user].nlargest(n, 'count')
sns.barplot(data=user_count, x=col_name, y='count', palette=palette)
plt.xticks(rotation=90)
f.suptitle(f'{user} {title_col} distribution')
fig_count += 1
plt.show()
def download_image(url):
'''Download the image from the given URL, then return it in an Image object.'''
r = requests.get(url, stream=True)
im = Image.open(r.raw)
return im
query_db(
"""
select *
from user_ranks
inner join games
on user_ranks.game_id = games.id
where user_ranks.name =='Jess'
""")
| name | game_id | rank | id | name | year | playing_time | rating | weight | bgg_rank | best_count | min_count | max_count | expansions | users_rated | img_url | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Jess | 28143 | 1 | 28143 | Race for the Galaxy | 2007 | 60 | 7.75687 | 2.9882 | 67 | 2 | 2 | 4 | 8 | 49463 | https://cf.geekdo-images.com/-DOqixs8uwKUvvWPK... |
| 1 | Jess | 124361 | 2 | 124361 | Concordia | 2013 | 100 | 8.12226 | 3.0098 | 19 | 4 | 2 | 5 | 8 | 32258 | https://cf.geekdo-images.com/CzwSm8i7tkLz6cBnr... |
| 2 | Jess | 84876 | 3 | 84876 | The Castles of Burgundy | 2011 | 90 | 8.12637 | 2.9977 | 15 | 2 | 2 | 4 | 14 | 51485 | https://cf.geekdo-images.com/5CFwjd8zTcGYVUnkX... |
| 3 | Jess | 21241 | 4 | 21241 | Neuroshima Hex! 3.0 | 2006 | 30 | 7.39981 | 2.6157 | 304 | 2 | 1 | 4 | 35 | 13927 | https://cf.geekdo-images.com/8KB-cVXjNmYA6xuw-... |
| 4 | Jess | 215 | 5 | 215 | Tichu | 1991 | 60 | 7.57592 | 2.3391 | 191 | 4 | 4 | 4 | 2 | 14009 | https://cf.geekdo-images.com/gz8_8iYP2SSGVAxpp... |
| 5 | Jess | 31260 | 6 | 31260 | Agricola | 2007 | 150 | 7.92074 | 3.6395 | 39 | 4 | 1 | 5 | 32 | 66685 | https://cf.geekdo-images.com/dDDo2Hexl80ucK1Il... |
| 6 | Jess | 233078 | 7 | 233078 | Twilight Imperium: Fourth Edition | 2017 | 480 | 8.66749 | 4.2581 | 6 | 6 | 3 | 6 | 3 | 16587 | https://cf.geekdo-images.com/_Ppn5lssO5OaildSE... |
| 7 | Jess | 146652 | 8 | 146652 | Legendary Encounters: An Alien Deck Building Game | 2014 | 60 | 7.77406 | 2.7054 | 127 | 3 | 1 | 5 | 3 | 12101 | https://cf.geekdo-images.com/jSz_KRUxsjGYitoqx... |
| 8 | Jess | 48726 | 9 | 48726 | Alien Frontiers | 2010 | 90 | 7.39297 | 2.5694 | 269 | 3 | 2 | 4 | 19 | 14570 | https://cf.geekdo-images.com/UWZTWLXFfg_6HpXJm... |
| 9 | Jess | 41 | 10 | 41 | Can't Stop | 1980 | 30 | 6.85736 | 1.1542 | 755 | 3 | 2 | 4 | 1 | 14817 | https://cf.geekdo-images.com/JZpo3AnyFH4TqxKWF... |
| 10 | Jess | 30549 | 11 | 30549 | Pandemic | 2008 | 45 | 7.58384 | 2.4058 | 110 | 4 | 2 | 4 | 7 | 110450 | https://cf.geekdo-images.com/S3ybV1LAp-8SnHIXL... |
| 11 | Jess | 167791 | 12 | 167791 | Terraforming Mars | 2016 | 120 | 8.41486 | 3.2462 | 4 | 3 | 1 | 5 | 23 | 76364 | https://cf.geekdo-images.com/wg9oOLcsKvDesSUdZ... |
| 12 | Jess | 2655 | 13 | 2655 | Hive | 2000 | 20 | 7.32242 | 2.3223 | 262 | 2 | 2 | 2 | 22 | 32195 | https://cf.geekdo-images.com/fQe85tsBZoH6ibPnm... |
| 13 | Jess | 822 | 14 | 822 | Carcassonne | 2000 | 45 | 7.41729 | 1.9047 | 197 | 2 | 2 | 5 | 160 | 110291 | https://cf.geekdo-images.com/okM0dq_bEXnbyQTOv... |
| 14 | Jess | 153938 | 15 | 153938 | Camel Up | 2014 | 30 | 7.04189 | 1.4759 | 510 | 5 | 2 | 8 | 4 | 24289 | https://cf.geekdo-images.com/1ph2jVOD1MudR1fK1... |
| 15 | Jess | 172 | 16 | 172 | For Sale | 1997 | 30 | 7.22917 | 1.2588 | 320 | 5 | 3 | 6 | 2 | 26120 | https://cf.geekdo-images.com/dJh9HkZC346NgPTAi... |
| 16 | Jess | 10630 | 17 | 10630 | Memoir '44 | 2004 | 60 | 7.55588 | 2.2712 | 154 | 2 | 2 | 8 | 32 | 27104 | https://cf.geekdo-images.com/chfLNSL0l7afCA5go... |
| 17 | Jess | 96848 | 18 | 96848 | Mage Knight Board Game | 2011 | 240 | 8.09556 | 4.3309 | 29 | 1 | 1 | 4 | 4 | 29748 | https://cf.geekdo-images.com/DUO2hz9AlLOH8p9ED... |
| 18 | Jess | 39463 | 19 | 39463 | Cosmic Encounter | 2008 | 120 | 7.53787 | 2.5772 | 165 | 5 | 3 | 5 | 7 | 29163 | https://cf.geekdo-images.com/S8cE-Ld7XP5sVz-up... |
| 19 | Jess | 265188 | 20 | 265188 | Glen More II: Chronicles | 2019 | 120 | 7.98949 | 3.0583 | 166 | 3 | 2 | 4 | 4 | 5367 | https://cf.geekdo-images.com/sMRuph3HFUcaSpJZi... |
| 20 | Jess | 98778 | 21 | 98778 | Hanabi | 2010 | 25 | 7.06822 | 1.6939 | 432 | 4 | 2 | 5 | 4 | 42205 | https://cf.geekdo-images.com/JDVksMwfcqoem1k_x... |
| 21 | Jess | 161936 | 22 | 161936 | Pandemic Legacy: Season 1 | 2015 | 60 | 8.58862 | 2.8305 | 2 | 4 | 2 | 4 | 0 | 45806 | https://cf.geekdo-images.com/-Qer2BBPG7qGGDu6K... |
| 22 | Jess | 2453 | 23 | 2453 | Blokus | 2000 | 20 | 6.87211 | 1.7612 | 699 | 4 | 2 | 4 | 0 | 24381 | https://cf.geekdo-images.com/96YA5wUJDxtPkmPxe... |
| 23 | Jess | 244521 | 24 | 244521 | The Quacks of Quedlinburg | 2018 | 45 | 7.85208 | 1.9506 | 57 | 4 | 2 | 4 | 5 | 31480 | https://cf.geekdo-images.com/pH5LgRL4mNRon-2Ns... |
| 24 | Jess | 8217 | 25 | 8217 | San Juan | 2004 | 60 | 7.24649 | 2.2863 | 325 | 3 | 2 | 4 | 2 | 22812 | https://cf.geekdo-images.com/kiD03bslLpN_0Qe1L... |
| 25 | Jess | 233867 | 26 | 233867 | Welcome To... | 2018 | 25 | 7.59263 | 1.8242 | 138 | 4 | 1 | 100 | 14 | 22494 | https://cf.geekdo-images.com/g4XmxyKhNVdhC3QPd... |
| 26 | Jess | 128882 | 27 | 128882 | The Resistance: Avalon | 2012 | 30 | 7.55872 | 1.7645 | 156 | 7 | 5 | 10 | 2 | 29204 | https://cf.geekdo-images.com/LPa6rsGcv8S0-OeNj... |
| 27 | Jess | 37380 | 28 | 37380 | Roll Through the Ages: The Bronze Age | 2008 | 45 | 6.86454 | 1.7807 | 785 | 2 | 1 | 4 | 1 | 11726 | https://cf.geekdo-images.com/VoLyy7ScO6aAU9Neo... |
| 28 | Jess | 183394 | 29 | 183394 | Viticulture Essential Edition | 2015 | 90 | 8.05300 | 2.8898 | 25 | 4 | 1 | 6 | 6 | 36038 | https://cf.geekdo-images.com/l_PRU2lVlX9seScRF... |
| 29 | Jess | 262543 | 30 | 262543 | Wavelength | 2019 | 45 | 7.42383 | 1.1193 | 405 | 6 | 2 | 12 | 1 | 6901 | https://cf.geekdo-images.com/z4fbPdmJg_5yphJEv... |
| 30 | Jess | 157969 | 31 | 157969 | Sheriff of Nottingham | 2014 | 60 | 7.11653 | 1.6529 | 412 | 5 | 3 | 5 | 16 | 31896 | https://cf.geekdo-images.com/BBgLFKUzr6tcKtlIM... |
| 31 | Jess | 92415 | 32 | 92415 | Skull | 2011 | 45 | 7.20158 | 1.1304 | 400 | 5 | 3 | 6 | 1 | 17072 | https://cf.geekdo-images.com/OPrd2iXm43dir7BwK... |
| 32 | Jess | 98351 | 33 | 98351 | Core Worlds | 2011 | 120 | 7.17473 | 2.8941 | 805 | 3 | 2 | 5 | 9 | 4084 | https://cf.geekdo-images.com/Yim5NN5RexitX1ZaA... |
| 33 | Jess | 36218 | 34 | 36218 | Dominion | 2008 | 30 | 7.60768 | 2.3542 | 106 | 3 | 2 | 4 | 48 | 82410 | https://cf.geekdo-images.com/j6iQpZ4XkemZP07HN... |
| 34 | Jess | 34635 | 35 | 34635 | Stone Age | 2008 | 90 | 7.55154 | 2.4701 | 130 | 4 | 2 | 4 | 7 | 47790 | https://cf.geekdo-images.com/elmZegVZ6gp4_5izU... |
| 35 | Jess | 70323 | 36 | 70323 | King of Tokyo | 2011 | 30 | 7.16593 | 1.4902 | 350 | 4 | 2 | 6 | 17 | 62106 | https://cf.geekdo-images.com/m_RzXpHURC0_xLkvR... |
| 36 | Jess | 42215 | 37 | 42215 | Tobago | 2009 | 60 | 7.10627 | 2.1281 | 532 | 4 | 2 | 4 | 1 | 11236 | https://cf.geekdo-images.com/fuC2C10_6QK84R45M... |
| 37 | Jess | 12942 | 38 | 12942 | No Thanks! | 2004 | 20 | 7.05878 | 1.1362 | 473 | 5 | 3 | 7 | 2 | 22310 | https://cf.geekdo-images.com/2DyxMVASynw87LRUL... |
| 38 | Jess | 50 | 39 | 50 | Lost Cities | 1999 | 30 | 7.19520 | 1.4901 | 329 | 2 | 2 | 2 | 2 | 40109 | https://cf.geekdo-images.com/AL5D-dXabY-Lk3PqI... |
| 39 | Jess | 254640 | 40 | 254640 | Just One | 2018 | 20 | 7.61912 | 1.0450 | 146 | 6 | 3 | 7 | 5 | 16771 | https://cf.geekdo-images.com/qKcKjHpzAvLddwBhy... |
| 40 | Jess | 203420 | 41 | 203420 | Exit: The Game – The Abandoned Cabin | 2016 | 120 | 7.49118 | 2.5461 | 267 | 2 | 1 | 6 | 0 | 9908 | https://cf.geekdo-images.com/r7cZVZMpP28w3-JQ-... |
| 41 | Jess | 160477 | 42 | 160477 | Onitama | 2014 | 20 | 7.38294 | 1.6807 | 270 | 2 | 2 | 2 | 9 | 17724 | https://cf.geekdo-images.com/FZdihu8SibqQYtwfo... |
| 42 | Jess | 163412 | 43 | 163412 | Patchwork | 2014 | 30 | 7.63233 | 1.6213 | 96 | 2 | 2 | 2 | 3 | 53518 | https://cf.geekdo-images.com/wLW7pFn0--20lEizE... |
| 43 | Jess | 152162 | 44 | 152162 | Diamonds: Second Edition | 2014 | 30 | 6.92018 | 1.5471 | 975 | 4 | 2 | 6 | 1 | 5129 | https://cf.geekdo-images.com/jWa2swVQ1oJ_lmBjO... |
| 44 | Jess | 68448 | 45 | 68448 | 7 Wonders | 2010 | 30 | 7.72904 | 2.3246 | 75 | 4 | 2 | 7 | 16 | 91329 | https://cf.geekdo-images.com/RvFVTEpnbb4NM7k0I... |
| 45 | Jess | 171668 | 46 | 171668 | The Grizzled | 2015 | 30 | 7.17220 | 1.9538 | 459 | 4 | 2 | 5 | 1 | 13238 | https://cf.geekdo-images.com/5JuQCtSAeSZpAL3Wg... |
| 46 | Jess | 2651 | 47 | 2651 | Power Grid | 2004 | 120 | 7.83964 | 3.2652 | 49 | 4 | 2 | 6 | 31 | 59184 | https://cf.geekdo-images.com/yd6LuatytHRhcFCxC... |
| 47 | Jess | 73761 | 48 | 73761 | K2 | 2010 | 60 | 7.03953 | 2.2598 | 637 | 3 | 1 | 5 | 3 | 9678 | https://cf.geekdo-images.com/JlUNpGfe_k7FSYLTr... |
| 48 | Jess | 760 | 49 | 760 | Battle Line | 2000 | 30 | 7.41472 | 1.8992 | 246 | 2 | 2 | 2 | 0 | 16290 | https://cf.geekdo-images.com/RnbZfspkPRfr6EMkR... |
| 49 | Jess | 45 | 50 | 45 | Perudo | 1800 | 30 | 6.90518 | 1.2621 | 733 | 6 | 2 | 6 | 0 | 13362 | https://cf.geekdo-images.com/UCiGSoCqbOsXS8JzZ... |
user_ranks = query_db(
"""
select user_ranks.name as person, user_ranks.rank, games.*
from user_ranks
inner join games
on user_ranks.game_id = games.id
""")
games = query_db('select * from games')
Of all the games in a top 50 list, the most important are the top 10. They are the best of the best, the cream of the crop. Therefore, we give them special precedence here by making a nice display of their cover art.
top10 = user_ranks[user_ranks['rank'] <= 10]
users = list(top10['person'].unique())
cols = len(users) + 1
rows = 10 + 1
gridspec = {
'width_ratios': [1] + ([5] * len(users)), # [1, 5, 5, ...] one 5 per user
'height_ratios': [1] + ([3] * (rows-1)), # [1, 5, 5, ...] one 5 per 50 games,
'wspace': 0.01
}
fig, axes = plt.subplots(rows, cols, figsize=(20, 15), gridspec_kw=gridspec)
fig.tight_layout(pad=10.0)
plt.subplots_adjust(top=5)
# top left corner
axes[0, 0].axis('off')
# header row
row_idx = 1
for user in users:
ax = axes[0, row_idx]
ax.set_title(user)
ax.axis('off')
row_idx += 1
# top 50 list rows
for rank, group in top10.groupby('rank'):
# ranking number display
ax = axes[rank, 0]
ax.set_title(f'#{rank}')
ax.axis('off')
# games per user
row_idx = 1
for user in users:
row = group[group['person'] == user]
url = row['img_url'].item()
title = row['name'].item()
title = (title[:25] + '...') if len(title) > 25 else title
img = download_image(url)
ax = axes[rank, row_idx]
ax.imshow(img)
ax.set_title(title)
ax.axis('off')
row_idx += 1
plt.show()
user_ranks[['person', 'name', 'rank']].pivot(index='rank', columns='person')
| name | ||||
|---|---|---|---|---|
| person | Darin | Jess | Seth | Zarin |
| rank | ||||
| 1 | The Castles of Burgundy | Race for the Galaxy | Terra Mystica | Terra Mystica |
| 2 | 7 Wonders Duel | Concordia | Magic: The Gathering | Dune: Imperium |
| 3 | Fury of Dracula (Third/Fourth Edition) | The Castles of Burgundy | Tichu | Lost Ruins of Arnak |
| 4 | Tzolk'in: The Mayan Calendar | Neuroshima Hex! 3.0 | Lorenzo il Magnifico | Terraforming Mars |
| 5 | Viticulture Essential Edition | Tichu | Lost Ruins of Arnak | Brass: Birmingham |
| 6 | Gloomhaven | Agricola | The Quacks of Quedlinburg | Great Western Trail |
| 7 | Clank!: Legacy – Acquisitions Incorporated | Twilight Imperium: Fourth Edition | Everdell | Dominion |
| 8 | Targi | Legendary Encounters: An Alien Deck Building Game | Tzolk'in: The Mayan Calendar | Mansions of Madness: Second Edition |
| 9 | Pandemic Legacy: Season 1 | Alien Frontiers | The Castles of Burgundy | Concordia |
| 10 | Dominion | Can't Stop | Tales of the Arabian Nights | Tichu |
| 11 | Orléans | Pandemic | Grand Austria Hotel | The Quacks of Quedlinburg |
| 12 | Clank! In! Space!: A Deck-Building Adventure | Terraforming Mars | Concordia | Teotihuacan: City of Gods |
| 13 | 7 Wonders | Hive | Great Western Trail | Spirit Island |
| 14 | Twilight Struggle | Carcassonne | Age of Empires III: The Age of Discovery | Cascadia |
| 15 | Wingspan | Camel Up | Architects of the West Kingdom | The Castles of Burgundy |
| 16 | Azul | For Sale | Glen More II: Chronicles | Sagrada |
| 17 | Splendor | Memoir '44 | Honey Buzz | Orléans |
| 18 | Dice Forge | Mage Knight Board Game | Alien Frontiers | Cryptid |
| 19 | Power Grid | Cosmic Encounter | Just One | Caverna: The Cave Farmers |
| 20 | Lost Ruins of Arnak | Glen More II: Chronicles | Fresco | Res Arcana |
| 21 | Terra Mystica | Hanabi | Power Grid | In the Hall of the Mountain King |
| 22 | Puerto Rico | Pandemic Legacy: Season 1 | Rajas of the Ganges | Splendor |
| 23 | Haven | Blokus | Twice as Clever! | Between Two Cities |
| 24 | Notre Dame | The Quacks of Quedlinburg | Clinic: Deluxe Edition | Lords of Waterdeep |
| 25 | Nertz | San Juan | Maracaibo | Stone Age |
| 26 | Bezzerwizzer | Welcome To... | Dinosaur Island | Imperial Settlers: Empires of the North |
| 27 | Istanbul | The Resistance: Avalon | Dominant Species | Roll Player |
| 28 | Century: Spice Road | Roll Through the Ages: The Bronze Age | Automania | Raiders of the North Sea |
| 29 | Just One | Viticulture Essential Edition | Agricola | 7 Wonders Duel |
| 30 | Codenames | Wavelength | Roll Player | Suburbia |
| 31 | Castles of Mad King Ludwig | Sheriff of Nottingham | Teotihuacan: City of Gods | Five Tribes |
| 32 | Mysterium | Skull | Orléans | Azul |
| 33 | Glen More II: Chronicles | Core Worlds | The Artemis Project | Power Grid |
| 34 | Stone Age | Dominion | Viticulture Essential Edition | Tzolk'in: The Mayan Calendar |
| 35 | Letters from Whitechapel | Stone Age | The Mind | Scythe |
| 36 | Sheriff of Nottingham | King of Tokyo | 7 Wonders Duel | Kenjin |
| 37 | Lanterns: The Harvest Festival | Tobago | The Lord of the Rings: Journeys in Middle-Earth | Dice Miner |
| 38 | Pandemic | No Thanks! | For Sale | Castles of Mad King Ludwig |
| 39 | Patchwork | Lost Cities | Carcassonne | Potion Explosion |
| 40 | Photosynthesis | Just One | Android: Netrunner | High Society |
| 41 | Hardback | Exit: The Game – The Abandoned Cabin | Dominion | Tiny Towns |
| 42 | Res Arcana | Onitama | Roll Through the Ages: The Bronze Age | Space Base |
| 43 | San Juan (Second Edition) | Patchwork | Telestrations | Just One |
| 44 | The Resistance: Avalon | Diamonds: Second Edition | The 7th Continent | Sushi Go Party! |
| 45 | Onitama | 7 Wonders | War of the Ring: Second Edition | Skull |
| 46 | Jaipur | The Grizzled | Wavelength | Kingdomino |
| 47 | Cribbage | Power Grid | Stone Age | Istanbul |
| 48 | Ticket to Ride: Europe | K2 | Can't Stop | Glass Road |
| 49 | Hive | Battle Line | Catchphrase | Bruges |
| 50 | Santorini | Perudo | Last Will | The Crew: The Quest for Planet Nine |
In this section, we check how many games appeared on multiple individual's lists. Each listed grouping shows the games that only they had in common.
# find every game with more than one person having it in their list
# join the users with commas
cross = user_ranks[['person', 'id']].groupby('id')['person'].apply(','.join).reset_index(name='people')
# get the count by counting the commas + 1 (e.g. "Person1,Person2" -> 2)
cross['count'] = cross['people'].str.count(',') + 1
# filter games to include only ones with crossover
cross = cross[cross['count'] > 1].sort_values('count', ascending=False)
# join with the games DataFrame to get the name
cross = pd.merge(cross, games, how='inner', on=['id', 'id'])
# display the images for each group
# todo: order this by number in the group (i.e. show crossover between group before pair)
for users, group in cross.groupby('people'):
users = users.split(',')
print('-' * 100)
print('Crossovers exclusive to %s' % ' & '.join(users))
urls = list(group['img_url'])
titles = list(group['name'])
# set width / height for the image grid
# note: a maximum of 25 images will be shown (not anticipating more than that anyway)
width = 5
height = 5
axes = []
fig = plt.figure(figsize=(16,16))
for a in range(min(len(urls), width * height)):
axes.append(fig.add_subplot(height, width, a+1))
url = urls[a]
title = titles[a]
img = download_image(url)
plt.imshow(img)
axes[-1].set_title(title)
plt.axis('off')
fig.tight_layout()
plt.show()
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Darin & Jess
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Darin & Jess & Seth
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Darin & Jess & Seth & Zarin
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Darin & Seth & Zarin
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Darin & Zarin
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Jess & Seth
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Jess & Seth & Zarin
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Jess & Zarin
---------------------------------------------------------------------------------------------------- Crossovers exclusive to Seth & Zarin
user_ranks = query_db(
"""
select user_ranks.name as person, user_ranks.rank, games.*
from user_ranks
inner join games
on user_ranks.game_id = games.id
""")
user_ranks['bgg_rank'] = user_ranks['bgg_rank'].astype(int)
In the following section, we compare each individual's ranking for a game against its ranking on Boardgamegeek. Because significant outliers in Boardgamegeek ranking can adversely affect the charts, we ignore any that are above the rank 2000. Omitting these games should have only a small effect on the results.
MAX_BGG_RANK = 2000
rankings_in_range = user_ranks[user_ranks['bgg_rank'] < MAX_BGG_RANK]
over_2000 = user_ranks[user_ranks['bgg_rank'] >= MAX_BGG_RANK]
if not over_2000.empty:
print('Ignoring %d element(s) with BGG rank > 2000:' % len(over_2000))
print(over_2000[['person', 'name', 'bgg_rank']])
Ignoring 6 element(s) with BGG rank > 2000:
person name bgg_rank
22 Darin Haven 2391
24 Darin Nertz 5726
25 Darin Bezzerwizzer 2504
148 Seth Catchphrase 9256
185 Zarin Kenjin 5416
186 Zarin Dice Miner 3032
Here we plot each individual's rankings against each game's Boardgamegeek ranking. The more closely the points resemble a straight line, the more the individual's rankings match up with those on Boardgamegeek. The charts include a best fit line and a highlighted confidence interval. A larger confidence interval indicates that the rankings do not closely match the relative ordering for the games' BGG rankings.
The confidence intervals shown should be taken with a grain of salt, because they are greatly affected by outliers, which were removed to make these charts readable.
sns.lmplot(x='rank', y='bgg_rank', data=rankings_in_range, col='person')
plt.suptitle("How do each user's rankings match against BGG?")
plt.tight_layout()
plt.show()
Here we see a box and whisker chart, showing the median and quartile ranges, as well as any outliers.
plt.figure(figsize=(16, 9))
plt.title('BGG rank distribution per user')
sns.boxplot(x='person', y='bgg_rank', data=rankings_in_range, boxprops=dict(alpha=0.6))
sns.swarmplot(x='person', y='bgg_rank', data=rankings_in_range, color='0.25')
plt.ylabel('BGG Rank')
plt.xlabel('')
plt.show()
Finally, we have a breakdown of how many games were in different ranges of BGG rankings. The total ranking distribution does include multiple instances of the same game if there is crossover. Because of this, these charts should be seen as "how many game rankings were in this range?", rather than "how many games were in this range?".
top_100 = user_ranks[['person', 'bgg_rank']]
top_100['rank'] = pd.cut(top_100['bgg_rank'], bins=[0, 100, 500, np.inf], labels=['Top 100', '101-500', '500+'])
top_100 = top_100.groupby(['person', 'rank']).size().unstack(fill_value=0)
labels = top_100.columns
# pull out the pie portion for top 100
explode = [0.1 if col == 'Top 100' else 0 for col in labels]
fig, axes = plt.subplots(2, len(top_100.index), figsize=(12, 9))
fig.tight_layout()
# zero out upper row
for i in range(len(top_100.index)):
axes[0, i].axis('off')
# set upper left corner to show total distribution
axes[0, 0].set_title('Total ranking distribution')
axes[0, 0].pie(top_100.sum(), labels=labels, explode=explode, autopct='%1.1f%%', startangle=90, shadow=True)
# set second row to show individual distributions
fig_count = 0
for i in top_100.index:
axes[1, fig_count].set_title(f'{i} ranking distribution')
axes[1, fig_count].pie(top_100.loc[i], labels=labels, explode=explode, autopct='%1.1f%%', startangle=90, shadow=True)
fig_count += 1
In this section we look at Boardgamegeek's "weight" for the selected games. The values range from 1 to 5, with more complex games scoring higher.
In the violin plots below, we can see the mean and quartile ranges as in the box and whisker plot, as well as the distribution of the data around that weight, shown as the width of the plot.
plt.figure(figsize=(16, 9))
plt.title('Weight distribution per user')
ax = sns.violinplot(x='person', y='weight', data=user_ranks, inner='quartile', scale='count', palette='muted')
sns.swarmplot(x='person', y='weight', data=user_ranks, color='0.25')
plt.ylabel('Weight')
plt.xlabel('')
plt.show()
In this section we will investigate the best player counts for games. Each game can support a certain number of players, and users on Boardgamegeek can vote "Best", "Recommended", and "Not Recommended" for each player count. Because a game may have multiple "best" player counts, we only consider the player count with the most "Best" votes as being the best for the purposes of this analysis.
plt.figure(figsize=(16, 16))
counts_by_user = user_ranks.groupby(['person', 'best_count']).size().reset_index(name='count')
sns.catplot(data=counts_by_user, kind='bar', x='best_count', y='count', hue='person', alpha=0.9, height=12)
plt.title('Players counts by person')
plt.xlabel('Player count')
plt.ylabel('Total')
plt.show()
<Figure size 1152x1152 with 0 Axes>
In this section, we investigate how each person's game selections break down by release dates. With such a wide range of possible dates, each chart only shows games from 1990 to today (selected semi-arbitrarily), but the omitted years for each individual are printed out above the charts.
MIN_YEAR = 1990
MAX_YEAR = 2021
users = user_ranks['person'].unique()
fig, axes = plt.subplots(nrows=len(users), ncols=1, figsize=(16, 25))
index = 0
for user in users:
# get the games for this person, group them by year and count them
by_year = user_ranks[user_ranks['person'] == user].groupby('year').size().reset_index(name='count')
# do we have any that are below the minimum year?
# if so, print them out with a nice little note
below_limit = by_year[by_year['year'] < MIN_YEAR]['year'].unique()
if below_limit.any():
print(f'Note - not including games for {user} from the following years for readability:')
for year in below_limit:
print(f' - {year}')
# filter to recent years to make the graph readable
by_year = by_year[by_year['year'] >= MIN_YEAR]
# add values for missing years to spread out the bars
for year in range(MIN_YEAR, MAX_YEAR+1):
if year not in by_year['year'].values:
by_year = by_year.append({'year': year, 'count': 0}, ignore_index=True)
barplot = sns.barplot(data=by_year, x='year', y='count', ax=axes[index])
barplot.set_xticklabels(barplot.get_xticklabels(), rotation=90, horizontalalignment='center')
axes[index].set_title(f"{user}'s top 50 games by year")
axes[index].set_ylabel('Total')
axes[index].set_xlabel('Release date')
index += 1
plt.tight_layout()
plt.show()
Note - not including games for Darin from the following years for readability: - 1630 - 1930 Note - not including games for Jess from the following years for readability: - 1800 - 1980 Note - not including games for Seth from the following years for readability: - 1974 - 1980
We can also evaluate each individual's list by their relative popularity by using the game's number of ratings on Boardgamegeek. Games with a low number of ratings will likely either be unpopular games or very new games, so we can use this metric to determine which individual has the most "underground" or "cult of the new" list.
low_counts = user_ranks.sort_values('users_rated', ascending=True)
low_counts = low_counts[['person', 'name', 'year', 'users_rated']]
low_counts.head(10)
| person | name | year | users_rated | |
|---|---|---|---|---|
| 148 | Seth | Catchphrase | 1974 | 190 |
| 185 | Zarin | Kenjin | 2015 | 400 |
| 24 | Darin | Nertz | 1930 | 418 |
| 186 | Zarin | Dice Miner | 2021 | 599 |
| 22 | Darin | Haven | 2018 | 951 |
| 132 | Seth | The Artemis Project | 2019 | 1621 |
| 123 | Seth | Clinic: Deluxe Edition | 2020 | 1662 |
| 116 | Seth | Honey Buzz | 2020 | 2182 |
| 127 | Seth | Automania | 2015 | 2304 |
| 40 | Darin | Hardback | 2018 | 2801 |
Here we see a plot of the popularity distribution for each user.
plt.figure(figsize=(16, 9))
plt.title('User ratings distribution per user')
ax = sns.boxplot(x='person', y='users_rated', data=user_ranks, palette='muted')
sns.swarmplot(x='person', y='users_rated', data=user_ranks, color='0.25')
plt.ylabel('Users Rated')
plt.xlabel('')
plt.show()
On Boardgamegeek, every game is labeled with a set of mechanics, such as "card drafting" or "worker placement". In this section, we look at how each person's selections are distributed with respect to these mechanics.
Note that every game can have a large number of listed mechanics. For example, a simple game like No Thanks! has five associated mechanisms, and a complex game like "Twilight Imperium 4" lists twelve. Because of this disparity, this analysis becomes biased toward more complex games, as they will tend to have more mechanics. In addition, some of the mechanics are so generic as to describe just about anything in a board game. Taking the top n mechanics for a game was discussed, but it turns out that this is not a valid approach. As an example, the top two mechanics listed for Ticket To Ride are "Card Drafting" and "End Game Bonuses". These are mechanisms present in the game, but a better description would probably come from the 3rd, 4th, and 6th in the list, "Hand Management", "Network and Route Building", and "Set Collection". Therefore, all mechanics for a game are included in the analysis.
mechanics = query_db(
"""
select user_ranks.name as person, games.name, mechanics.mechanic
from user_ranks
inner join games
on user_ranks.game_id = games.id
inner join mechanics
on games.id = mechanics.game_id
""")
Below, we chart the 10 mechanics that appear the most overall, as well as the mechanic distribution for each individual. As above with the BGG ranking distributions, the total distribution does count games multiple times. Loosely, this means that the total chart should be interpreted as answering the question "how many game selections were made with this mechanic?"
The charts all have high counts for mechanics like "Hand Management" and "Dice Rolling", but this information is all but useless to us, because just about every game that includes cards or dice requires you to manage those cards or roll the dice.
plot_category_bar(mechanics, 'mechanic', title_col='mechanic', n=10)
In this section, we analyze statistics about each game's "family". On Boardgamegeek, a family can refer to a high-level mechanism (e.g. 4X), certain types of components (e.g. miniatures), or many other groupings. Looking at the families overall will not give useful information, so each subfamily will be looked at individually below.
families = query_db(
"""
select user_ranks.name as person, games.name, families.family
from user_ranks
inner join games
on user_ranks.game_id = games.id
inner join families
on games.id = families.game_id
""")
The board game community has wholeheartedly embraced Kickstarter, for better or worse. The "Crowdfunding: Kickstarter" family notes a game that has been released on the platform. Here we ask the simple question, "Which person has the most Kickstarter games on their list?"
kickstarter_count = families.groupby(['person', 'family']).size().reset_index(name='count')
kickstarter_count = kickstarter_count[kickstarter_count['family'].str.contains('Kickstarter')]
sns.barplot(x='person', y='count', data=kickstarter_count)
plt.title('Which person has the most Kickstarter games?')
plt.show()
The "Component:" family is used to categorize the physical construction of the game, and how they are used.
component_family = families[families['family'].str.startswith('Components')]
# remove 'Components: ' from the start of the family name
component_family['family'] = component_family['family'].str.replace('Components: ', '')
plot_category_bar(component_family, col_name='family', n=5, title_col='Component family', figsize=(6, 6))
On Boardgamegeek, a game can be be assigned a set of categories that define the game at a high-level. These categories may describe the overall theme of the game (e.g. Pandemic falls under the "Medical" category), or they may describe what kind of a game it is (e.g. The Resistance is in the "Party Game" category).
categories = query_db(
"""
select user_ranks.name as person, games.name, categories.category
from user_ranks
inner join games
on user_ranks.game_id = games.id
inner join categories
on games.id = categories.game_id
""")
The counts in this section are by user inclusions, not by game. To reiterate what has been said already in earlier sections, the overall chart should be seen as answering the question "how many selected games fall under this category?"
plot_category_bar(categories, col_name='category', n=10, title_col='Game Category', figsize=(6, 6))